[Chapter Sixteen][Previous]
[Next] [Art of
Assembly][Randall Hyde]
Art of Assembly: Chapter Sixteen
- 16.8.4 - A Tiny Assembler
16.8.4 A Tiny Assembler
Although the UCR Standard Library pattern matching routines would probably
not be appropriate for writing a full lexical analyzer or compiler, they
are useful for writing small compilers/assemblers or programs where speed
of compilation/assembly is of little concern. One good example is the simple
nonsymbolic assembler appearing in the SIM886 simulator for an earlier version
of the x86 processors. This "mini-assembler" accepts an x86 assembly
language statement and immediately assembles it into memory. This allows
SIM886 users to create simple assembly language programs within the SIM886
monitor/debugger. Using the Standard Library pattern matching routines makes
it very easy to implement such an assembler.
The grammar for this miniassembler is
Stmt
Grp1 reg "," operand |
Grp2 reg "," reg "," constant |
Grp3 operand |
goto operand |
halt
Grp1
load | store | add | sub
Grp2
ifeq | iflt | ifgt
Grp3
get | put
reg
ax | bx | cx | dx
operand
reg | constant | [bx] | constant [bx]
constant
hexdigit constant | hexdigit
hexdigit
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b |
c | d | e | f
There are some minor semantic details that the program handles (such as
disallowing stores into immediate operands). The assembly code for the miniassembler
follows:
; ASM.ASM
;
.xlist
include stdlib.a
matchfuncs
includelib stdlib.lib
.list
dseg segment para public 'data'
; Some sample statements to assemble:
Str1 byte "load ax, 0",0
Str2 byte "load ax, bx",0
Str3 byte "load ax, ax",0
Str4 byte "add ax, 15",0
Str5 byte "sub ax, [bx]",0
Str6 byte "store bx, [1000]",0
Str7 byte "load bx, 2000[bx]",0
Str8 byte "goto 3000",0
Str9 byte "iflt ax, bx, 100",0
Str10 byte "halt",0
Str11 byte "This is illegal",0
Str12 byte "load ax, store",0
Str13 byte "store ax, 1000",0
Str14 byte "ifeq ax, 0, 0",0
; Variables used by the assembler.
AsmConst word 0
AsmOpcode byte 0
AsmOprnd1 byte 0
AsmOprnd2 byte 0
include stdsets.a ;Bring in the standard char sets.
; Patterns for the assembler:
; Pattern is (
; (load|store|add|sub) reg "," operand |
; (ifeq|iflt|ifgt) reg1 "," reg2 "," const |
; (get|put) operand |
; goto operand |
; halt
; )
;
; With a few semantic additions (e.g., cannot store to a const).
InstrPat pattern {spancset, WhiteSpace,Grp1,Grp1}
Grp1 pattern {sl_Match2,Grp1Strs, Grp2 ,Grp1Oprnds}
Grp1Strs pattern {TryLoad,,Grp1Store}
Grp1Store pattern {TryStore,,Grp1Add}
Grp1Add pattern {TryAdd,,Grp1Sub}
Grp1Sub pattern {TrySub}
; Patterns for the LOAD, STORE, ADD, and SUB instructions.
LoadPat pattern {MatchStr,LoadInstr2}
LoadInstr2 byte "LOAD",0
StorePat pattern {MatchStr,StoreInstr2}
StoreInstr2 byte "STORE",0
AddPat pattern {MatchStr,AddInstr2}
AddInstr2 byte "ADD",0
SubPat pattern {MatchStr,SubInstr2}
SubInstr2 byte "SUB",0
; Patterns for the group one (LOAD/STORE/ADD/SUB) instruction operands:
Grp1Oprnds pattern {spancset,WhiteSpace,Grp1reg,Grp1reg}
Grp1Reg pattern {MatchReg,AsmOprnd1,,Grp1ws2}
Grp1ws2 pattern {spancset,WhiteSpace,Grp1Comma,Grp1Comma}
Grp1Comma pattern {MatchChar,',',0,Grp1ws3}
Grp1ws3 pattern {spancset,WhiteSpace,Grp1Op2,Grp1Op2}
Grp1Op2 pattern {MatchGen,,,EndOfLine}
EndOfLine pattern {spancset,WhiteSpace,NullChar,NullChar}
NullChar pattern {EOS}
Grp1Op2Reg pattern {MatchReg,AsmOprnd2}
; Patterns for the group two instructions (IFEQ, IFLT, IFGT):
Grp2 pattern {sl_Match2,Grp2Strs, Grp3 ,Grp2Oprnds}
Grp2Strs pattern {TryIFEQ,,Grp2IFLT}
Grp2IFLT pattern {TryIFLT,,Grp2IFGT}
Grp2IFGT pattern {TryIFGT}
Grp2Oprnds pattern {spancset,WhiteSpace,Grp2reg,Grp2reg}
Grp2Reg pattern {MatchReg,AsmOprnd1,,Grp2ws2}
Grp2ws2 pattern {spancset,WhiteSpace,Grp2Comma,Grp2Comma}
Grp2Comma pattern {MatchChar,',',0,Grp2ws3}
Grp2ws3 pattern {spancset,WhiteSpace,Grp2Reg2,Grp2Reg2}
Grp2Reg2 pattern {MatchReg,AsmOprnd2,,Grp2ws4}
Grp2ws4 pattern {spancset,WhiteSpace,Grp2Comma2,Grp2Comma2}
Grp2Comma2 pattern {MatchChar,',',0,Grp2ws5}
Grp2ws5 pattern {spancset,WhiteSpace,Grp2Op3,Grp2Op3}
Grp2Op3 pattern {ConstPat,,,EndOfLine}
; Patterns for the IFEQ, IFLT, and IFGT instructions.
IFEQPat pattern {MatchStr,IFEQInstr2}
IFEQInstr2 byte "IFEQ",0
IFLTPat pattern {MatchStr,IFLTInstr2}
IFLTInstr2 byte "IFLT",0
IFGTPat pattern {MatchStr,IFGTInstr2}
IFGTInstr2 byte "IFGT",0
; Grp3 Patterns:
Grp3 pattern {sl_Match2,Grp3Strs, Grp4 ,Grp3Oprnds}
Grp3Strs pattern {TryGet,,Grp3Put}
Grp3Put pattern {TryPut,,Grp3GOTO}
Grp3Goto pattern {TryGOTO}
; Patterns for the GET and PUT instructions.
GetPat pattern {MatchStr,GetInstr2}
GetInstr2 byte "GET",0
PutPat pattern {MatchStr,PutInstr2}
PutInstr2 byte "PUT",0
GOTOPat pattern {MatchStr,GOTOInstr2}
GOTOInstr2 byte "GOTO",0
; Patterns for the group three (PUT/GET/GOTO) instruction operands:
Grp3Oprnds pattern {spancset,WhiteSpace,Grp3Op,Grp3Op}
Grp3Op pattern {MatchGen,,,EndOfLine}
; Patterns for the group four instruction (HALT).
Grp4 pattern {TryHalt,,,EndOfLine}
HaltPat pattern {MatchStr,HaltInstr2}
HaltInstr2 byte "HALT",0
; Patterns to match the four non-register addressing modes:
BXIndrctPat pattern {MatchStr,BXIndrctStr}
BXIndrctStr byte "[BX]",0
BXIndexedPat pattern {ConstPat,,,BXIndrctPat}
DirectPat pattern {MatchChar,'[',,DP2}
DP2 pattern {ConstPat,,,DP3}
DP3 pattern {MatchChar,']'}
ImmediatePat pattern {ConstPat}
; Pattern to match a hex constant:
HexConstPat pattern {Spancset, xdigits}
dseg ends
cseg segment para public 'code'
assume cs:cseg, ds:dseg
; The store macro tweaks the DS register and stores into the
; specified variable in DSEG.
store macro Where, What
push ds
push ax
mov ax, seg Where
mov ds, ax
mov Where, What
pop ax
pop ds
endm
; Pattern matching routines for the assembler.
; Each mnemonic has its own corresponding matching function that
; attempts to match the mnemonic. If it does, it initializes the
; AsmOpcode variable with the base opcode of the instruction.
; Compare against the "LOAD" string.
TryLoad proc far
push dx
push si
ldxi LoadPat
match2
jnc NoTLMatch
store AsmOpcode, 0 ;Initialize base opcode.
NoTLMatch: pop si
pop dx
ret
TryLoad endp
; Compare against the "STORE" string.
TryStore proc far
push dx
push si
ldxi StorePat
match2
jnc NoTSMatch
store AsmOpcode, 1 ;Initialize base opcode.
NoTSMatch: pop si
pop dx
ret
TryStore endp
; Compare against the "ADD" string.
TryAdd proc far
push dx
push si
ldxi AddPat
match2
jnc NoTAMatch
store AsmOpcode, 2 ;Initialize ADD opcode.
NoTAMatch: pop si
pop dx
ret
TryAdd endp
; Compare against the "SUB" string.
TrySub proc far
push dx
push si
ldxi SubPat
match2
jnc NoTMMatch
store AsmOpcode, 3 ;Initialize SUB opcode.
NoTMMatch: pop si
pop dx
ret
TrySub endp
; Compare against the "IFEQ" string.
TryIFEQ proc far
push dx
push si
ldxi IFEQPat
match2
jnc NoIEMatch
store AsmOpcode, 4 ;Initialize IFEQ opcode.
NoIEMatch: pop si
pop dx
ret
TryIFEQ endp
; Compare against the "IFLT" string.
TryIFLT proc far
push dx
push si
ldxi IFLTPat
match2
jnc NoILMatch
store AsmOpcode, 5 ;Initialize IFLT opcode.
NoILMatch: pop si
pop dx
ret
TryIFLT endp
; Compare against the "IFGT" string.
TryIFGT proc far
push dx
push si
ldxi IFGTPat
match2
jnc NoIGMatch
store AsmOpcode, 6 ;Initialize IFGT opcode.
NoIGMatch: pop si
pop dx
ret
TryIFGT endp
; Compare against the "GET" string.
TryGET proc far
push dx
push si
ldxi GetPat
match2
jnc NoGMatch
store AsmOpcode, 7 ;Initialize Special opcode.
store AsmOprnd1, 2 ;GET's Special opcode.
NoGMatch: pop si
pop dx
ret
TryGET endp
; Compare against the "PUT" string.
TryPut proc far
push dx
push si
ldxi PutPat
match2
jnc NoPMatch
store AsmOpcode, 7 ;Initialize Special opcode.
store AsmOprnd1, 3 ;PUT's Special opcode.
NoPMatch: pop si
pop dx
ret
TryPUT endp
; Compare against the "GOTO" string.
TryGOTO proc far
push dx
push si
ldxi GOTOPat
match2
jnc NoGMatch
store AsmOpcode, 7 ;Initialize Special opcode.
store AsmOprnd1, 1 ;PUT's Special opcode.
NoGMatch: pop si
pop dx
ret
TryGOTO endp
; Compare against the "HALT" string.
TryHalt proc far
push dx
push si
ldxi HaltPat
match2
jnc NoHMatch
store AsmOpcode, 7 ;Initialize Special opcode.
store AsmOprnd1, 0 ;Halt's special opcode.
store AsmOprnd2, 0
NoHMatch: pop si
pop dx
ret
TryHALT endp
; MatchReg checks to see if we've got a valid register value. On entry,
; DS:SI points at the location to store the byte opcode (0, 1, 2, or 3) for
; a reasonable register (AX, BX, CX, or DX); ES:DI points at the string
; containing (hopefully) the register operand, and CX points at the last
; location plus one we can check in the string.
;
; On return, Carry=1 for success, 0 for failure. ES:AX must point beyond
; the characters which make up the register if we have a match.
MatchReg proc far
; ES:DI Points at two characters which should be AX/BX/CX/DX. Anything
; else is an error.
cmp byte ptr es:1[di], 'X' ;Everyone needs this
jne BadReg
xor ax, ax ;886 "AX" reg code.
cmp byte ptr es:[di], 'A' ;AX?
je GoodReg
inc ax
cmp byte ptr es:[di], 'B' ;BX?
je GoodReg
inc ax
cmp byte ptr es:[di], 'C' ;CX?
je GoodReg
inc ax
cmp byte ptr es:[di], 'D' ;DX?
je GoodReg
BadReg: clc
mov ax, di
ret
GoodReg:
mov ds:[si], al ;Save register opcode.
lea ax, 2[di] ;Skip past register.
cmp ax, cx ;Be sure we didn't go
ja BadReg ; too far.
stc
ret
MatchReg endp
; MatchGen- Matches a general addressing mode. Stuffs the appropriate
; addressing mode code into AsmOprnd2. If a 16-bit constant
; is required by this addressing mode, this code shoves that
; into the AsmConst variable.
MatchGen proc far
push dx
push si
; Try a register operand.
ldxi Grp1Op2Reg
match2
jc MGDone
; Try "[bx]".
ldxi BXIndrctPat
match2
jnc TryBXIndexed
store AsmOprnd2, 4
jmp MGDone
; Look for an operand of the form "xxxx[bx]".
TryBXIndexed:
ldxi BXIndexedPat
match2
jnc TryDirect
store AsmOprnd2, 5
jmp MGDone
; Try a direct address operand "[xxxx]".
TryDirect:
ldxi DirectPat
match2
jnc TryImmediate
store AsmOprnd2, 6
jmp MGDone
; Look for an immediate operand "xxxx".
TryImmediate:
ldxi ImmediatePat
match2
jnc MGDone
store AsmOprnd2, 7
MGDone:
pop si
pop dx
ret
MatchGen endp
; ConstPat- Matches a 16-bit hex constant. If it matches, it converts
; the string to an integer and stores it into AsmConst.
ConstPat proc far
push dx
push si
ldxi HexConstPat
match2
jnc CPDone
push ds
push ax
mov ax, seg AsmConst
mov ds, ax
atoh
mov AsmConst, ax
pop ax
pop ds
stc
CPDone: pop si
pop dx
ret
ConstPat endp
; Assemble- This code assembles the instruction that ES:DI points
; at and displays the hex opcode(s) for that instruction.
Assemble proc near
; Print out the instruction we're about to assemble.
print
byte "Assembling: ",0
strupr
puts
putcr
; Assemble the instruction:
ldxi InstrPat
xor cx, cx
match
jnc SyntaxError
; Quick check for illegal instructions:
cmp AsmOpcode, 7 ;Special/Get instr.
jne TryStoreInstr
cmp AsmOprnd1, 2 ;GET opcode
je SeeIfImm
cmp AsmOprnd1, 1 ;Goto opcode
je IsGOTO
TryStoreInstr: cmp AsmOpcode, 1 ;Store Instruction
jne InstrOkay
SeeIfImm: cmp AsmOprnd2, 7 ;Immediate Adrs Mode
jne InstrOkay
print
db "Syntax error: store/get immediate not allowed."
db " Try Again",cr,lf,0
jmp ASMDone
IsGOTO: cmp AsmOprnd2, 7 ;Immediate mode for GOTO
je InstrOkay
print
db "Syntax error: GOTO only allows immediate "
byte "mode.",cr,lf
db 0
jmp ASMDone
; Merge the opcode and operand fields together in the instruction byte,
; then output the opcode byte.
InstrOkay: mov al, AsmOpcode
shl al, 1
shl al, 1
or al, AsmOprnd1
shl al, 1
shl al, 1
shl al, 1
or al, AsmOprnd2
puth
cmp AsmOpcode, 4 ;IFEQ instruction
jb SimpleInstr
cmp AsmOpcode, 6 ;IFGT instruction
jbe PutConstant
SimpleInstr: cmp AsmOprnd2, 5
jb ASMDone
; If this instruction has a 16 bit operand, output it here.
PutConstant: mov al, ' '
putc
mov ax, ASMConst
puth
mov al, ' '
putc
xchg al, ah
puth
jmp ASMDone
SyntaxError: print
db "Syntax error in instruction."
db cr,lf,0
ASMDone: putcr
ret
Assemble endp
; Main program that tests the assembler.
Main proc
mov ax, seg dseg ;Set up the segment registers
mov ds, ax
mov es, ax
meminit
lesi Str1
call Assemble
lesi Str2
call Assemble
lesi Str3
call Assemble
lesi Str4
call Assemble
lesi Str5
call Assemble
lesi Str6
call Assemble
lesi Str7
call Assemble
lesi Str8
call Assemble
lesi Str9
call Assemble
lesi Str10
call Assemble
lesi Str11
call Assemble
lesi Str12
call Assemble
lesi Str13
call Assemble
lesi Str14
call Assemble
Quit: ExitPgm
Main endp
cseg ends
sseg segment para stack 'stack'
stk db 256 dup ("stack ")
sseg ends
zzzzzzseg segment para public 'zzzzzz'
LastBytes db 16 dup (?)
zzzzzzseg ends
end Main
Sample Output:
Assembling: LOAD AX, 0
07 00 00
Assembling: LOAD AX, BX
01
Assembling: LOAD AX, AX
00
Assembling: ADD AX, 15
47 15 00
Assembling: SUB AX, [BX]
64
Assembling: STORE BX, [1000]
2E 00 10
Assembling: LOAD BX, 2000[BX]
0D 00 20
Assembling: GOTO 3000
EF 00 30
Assembling: IFLT AX, BX, 100
A1 00 01
Assembling: HALT
E0
Assembling: THIS IS ILLEGAL
Syntax error in instruction.
Assembling: LOAD AX, STORE
Syntax error in instruction.
Assembling: STORE AX, 1000
Syntax error: store/get immediate not allowed. Try Again
Assembling: IFEQ AX, 0, 0
Syntax error in instruction.
- 16.8.4 - A Tiny Assembler
Art of Assembly: Chapter Sixteen - 29 SEP 1996
[Chapter Sixteen][Previous]
[Next] [Art of
Assembly][Randall Hyde]